clear all 
clear global

%paths
folderinput = '/Users/wittwer/Dropbox/5_Jason and Milena/data/Matlab_JMP_2R1/main_estimation_submission2R1/1_input';
folderfig   = '/Users/wittwer/Dropbox/5_Jason and Milena/data/Matlab_JMP_2R1/main_estimation_submission2R1/3_figures';
foldermats  = '/Users/wittwer/Dropbox/5_Jason and Milena/data/Matlab_JMP_2R1/main_estimation_submission2R1/2_output';


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%LOAD DATA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% fix seed for the random number generator to ensure replicability
rand('twister', 1);

%%%% A) CHOOSE SPECIFICATION 

%Which model to estimate?     
homedealerL=1; 
    %=0 in benchmark model, 
    %=1 in the model with homedealers
 
    %CAREFUL with homedealerL=0, dailyL=1 
zerobenefit=0;  %is always 0 other than when running "zerobeneift" specification (b) below       
    %a) zerobenefit=0: 
    % To Run the code with the qualities from the market shares without homedealers, set homedealerL=0, dailyL=1 and keepallL=0. 
    
    %b) zerobenefit=1--this is used in the draft: 
    % To fix the base qualities across model specifications use homedealerL=1 to read in data. 
        %Then  the sigma and all xij's are set to the base quality below, xij_for_investorL is changed to 0, and homedealer when saving files, and ending: '_zerobenefit' is added
        
        %---> To use this specification to run counterfactuals, you must change homedealer=0 manually after reading in the data. 
        
%Retailer or institutional?
retailerL=1;
    %=1 for retailer
    %=0 for institutional

%Regular or large trades?
largetradeL=0;
    %=0 include all trades (benchmark)
    %=1 separate trades between large and regular

%Choose the period length (this is called "week" in the Stata dofile)    
dayL=1; 
    %=0 day
    %=1 week

%Choose which investorbase to use 
keepallL=1;
    %=0 only investors with LEIs and retailers 
    %=1 all investors

%Choose whether to use dealer-fes or not 
fejL=1; 
   %=0 without dealer fe
   %=1 with dealer fe
    
%%%% B) FIX SOME OTHER PARAMETERS

% These parameters were changed in working paper versions for robustness. This is no longer part of the draft.    
if homedealerL==1
        xij_for_investorL=0;  
    else 
        xij_for_investorL=1;  
end
    %=1 For benchmark model (homedealer=0) is the main specification: means price = theta + nu + xij 
    
    %=0 a) For benchmark model: means price = theta + nu 
       %b) For extended model (homedealer=1) is main specification in which the investor gets the homedealer benefit but pays the base quality on and off the platform                   
    %---> NOTE: if ETA>0, must choose: xij_for_investor=1 

     
etaL =0;
    %=0 for benchmark model
    %=0 for model with home dealer 
    %=0.25, 0.5, ... for robustnes of benchmark model (homedealer=0)

quantityrobusL=0;
    %=0 full data set 
    %=1 exclude 5% largest quantities
    

%%%%%%%%%%%%%%%%%%%%%%%%%
%%LOAD DATA
%%%%%%%%%%%%%%%%%%%%%%%%% 

%% MAIN DATA
temp=['/MatlabEstimationData_', num2str(dayL), ' _', num2str(homedealerL), ' _', num2str(keepallL),' _', num2str(quantityrobusL),' _', num2str(largetradeL), ' _',num2str(fejL) ];
a_num = importdata(fullfile([folderinput,temp,'.csv']));

data = a_num.data; 
data = sortrows(data, [1,16,17]); %sort Y on id and then dealer 
   
%structure of data and Y:
    %1)id 
    %2)period id 
    %3)retailer 
    %4)IIROC_SIDE  
    %5)theta 
    %6)large-trade indicator (=1 for trade size > 25 million)
    %7)NaN
    %8)IIROC_YIELD
    %9)venue
    %10)theta 
    %11)theta
    %12)s0j - platform market share for benchmark model, something weird for homedealer model
    %13)rhoj
    %14)xij
    %15)sigma
    %16)bidderid
    %17)countingj (# of line per dealer/id)
    %18)quote on CanDeal jt
    %19) feij
    %20) ctj
    %21) basedealer
    %22) homedealer ID
    %23) base quality (in model extension)
    %24) homedealer quality (in model extension)
    %25) homedealer or not dummy (in model extension)
    %26) countingjH (# of line per dealer-homedealer)
 
   
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% DECLARE GLOBALS AND SET PARAMETERS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
global D I T Imax Dmax basedealer xij_for_investor eta homedealer quantityrobus day keepall largetrade fej 
          
if zerobenefit==0
    xij_for_investor       = xij_for_investorL;
else 
    xij_for_investor    = 0;
end 
eta                          = etaL;
homedealer            = homedealerL;
quantityrobus         = quantityrobusL;
day                         = dayL;
keepall                   = keepallL; 
largetrade              = largetradeL;
retailer                   = retailerL;
fej                          = fejL;

spant   = 1; %how many days to pool --> currently all are 1


%CHOOSE SPECIFIACTION:  RETAILER EARN HOME DEALER BENEFIT OR NOT

%a) If retail investors earn the same home dealer beneift as institutional investors with LEIs, no change 
%b) If retail investors earn 0 home dealer benefit use the follow:
%data(data(:,3)==1, 24)=data(data(:,3)==1, 23);

    %---> CAREFUL: THIS MUST BE CHOSEN MANUALLY! (else there are too many global variables and specifications floating around)

tic

if largetrade==0, vT=0; end
if largetrade==1, vT=[0,1]; end

for tradesize=vT 
    %=0 regular sized trades (<25 million)
    %=1 large trades (>25 million)

%Loop over retailer and side
for side=[1 -1]

       
%Load dealer's valuation for retailers if eta>0
if eta>0 && retailer==1 && homedealer==0
    
    if side==1, vdjSQ=vdjSQ_IB;       
    elseif side==-1, vdjSQ=vdjSQ_IS; 
    end 
    
end 

%Initial parameters
mu = 0; sigma= 1; muc= 0; 
parameters=[mu,sigma,muc];
    

%%% KEEP DATA of institutional clients (or retailers), of the correct side of the trade
if largetrade==0 %all trades
    Y = data(data(:,3)==retailer & data(:,4)==side,:); %FIRST REPLICATE WHAT I HAD BEFORE 
elseif largetrade==1 %separate trades
    Y = data(data(:,3)==retailer & data(:,4)==side  & data(:,6)==tradesize,:);
end

%TO RUN HOME DEALER MODEL WITHOUT LOYALTY BENEFIT: 
if zerobenefit==1
    Y(:,15)=1/0.686;
    Y(:,14)=Y(:,23); %use only base qualities
    Y(:,24)=Y(:,23); %use only base qualities
end 

%Address for new periods
new_period = zeros(length(unique(Y(:,2))),2);
count=2;
new_period(1, :)=[Y(1,2),1];

for i=2:size(Y,1)
    if Y(i,2)~=Y(i-1,2)
    new_period(count,:) = [Y(i,2), i];  
    count=count+1;
    end
end 

new_period_end = new_period(2:end,2)-1;
new_period = [new_period, [new_period_end;size(Y,1)]];


%Define a vector with dates that are used in estimation, excluding 
        %1) Days with no trades 
        %2) Days with not enough OTC (this might happen for retailers)
        
T                 = length(unique(data(:,2)));          %number of trading dates of any group
Tg                = size(new_period,1);                 %number of trading dates of this investor group

%Sort
Y = sortrows(Y, [1,16,17]); %(Note: this is done in the mainfile as well, should not do anything)

%Relabel dealers 
Y(Y(:,16)==18,16)=1;
Y(Y(:,16)==24,16)=2;
Y(Y(:,16)==46,16)=3;
Y(Y(:,16)==61,16)=4;
Y(Y(:,16)==67,16)=5;
Y(Y(:,16)==77,16)=6;
Y(Y(:,16)==88,16)=7;
Y(Y(:,16)==93,16)=8;
Y(Y(:,16)==94,16)=9;


%baseline dealer 
basedealer = unique(Y(:,21)); 
Imax           = length(unique(Y(:,16)));
Dmax         = unique(Y(:,16));

  
%Determine the dealers each day flexibly
Notc= zeros(Tg,1);
for t=1:Tg
%for k=1:size(vec,2)
    D{t} = unique(Y(new_period(t,2):new_period(t,3),16));
    I{t} =size(D{t},1);
    Notc(t) = sum(Y(new_period(t,2):new_period(t,3),9)==1); %number of OTC trades
end 


Nt                   = new_period(:,3)-new_period(:,2)+1;  %number of trades on that day
exclude          = find(Notc<10);                      %index showing dates with <10 OTC trades
idx_missing_dates = find(~ismember(unique(data(:,2)), unique(Y(:,2))));
exclude          = [exclude; idx_missing_dates];
vecall             = setdiff(1:floor(T/spant),[exclude; idx_missing_dates]); %vector in T
vec                 = setdiff(1:floor(Tg/spant),exclude); %vector with t in Tg that will be estimated


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% ESTIMATION PROCEDURE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if retailer==0
    lb = [-Inf, 0, -Inf];
    ub = [];
elseif retailer==1
     lb = [-Inf, 0];
     ub = [];
end 

options1 = optimoptions('fmincon' ,  'FiniteDifferenceType','central','Algorithm', 'interior-point');
options2 = optimoptions('fmincon' ,  'FiniteDifferenceType','central','Algorithm', 'interior-point', 'OptimalityTolerance',1e-40, 'StepTolerance', 1e-40, 'FunctionTolerance', 1e-40,   'ConstraintTolerance', 1e-40, 'MaxFunctionEvaluations', 10^6, 'MaxIterations', 10^6);  
options = options2;
warning('off','all')


%%%%%%%%%%%%%%%%%%%%%
% Estimate 
%%%%%%%%%%%%%%%%%%%%%

%initialize Mhat, and set the non-estimated days to NaN 
%(careful the size of Mhat is only the traded days of this group!
if retailer==0 Mhat = NaN*ones(Tg,3);
elseif retailer==1 Mhat = NaN*ones(Tg,2); end 
 
for k=1:size(vec,2)
    t=vec(k);
    %t=1+(w-1)*spant;
      
    idxt    = [new_period(t,2):new_period(t+spant-1,3)]'; %lines in Y
    n       = size(idxt,1); %number of trades
    idxotct = idxt(find(Y(idxt,9)==1)); %lines in Y with OTC trades
    
   
    if retailer==0
        candidate_indices=[1,2,3];
        vdjQt=[];
    elseif retailer==1 
        candidate_indices=[1,2];
        dates=unique(Y(idxt,2));
        vdjQt=[];
        %fill in with valuations for eta>0
        if side==1 && eta>0
            vdjQt=vdjSQ_IB(vdjSQ_IB(:,1)==dates, 2:end);
        elseif side==-1 && eta>0
            vdjQt=vdjSQ_IS(vdjSQ_IS(:,1)==dates,2:end);  
        end 
    end
    
    %Gridsearch (using the benchmak model)
    Npoints=20;
    [x_min(t,:) f_min(t,:)] = gridsearch_avg(candidate_indices, parameters, Y(idxt,:), vdjQt, t,  side, retailer,  Npoints,1);

    %First step
    star_param = parameters;
    star_param(candidate_indices) =x_min(t,:);
    
    %model extension with homedealer
    if homedealer==0 && eta==0
        [Mhat0t(t,:), fval0t(t,:),flag0t(t,:),output0t{t}] = fmincon(@(z)gmm_fun_y_c_avg(z, candidate_indices, star_param,  Y(idxt,:), t,side, retailer, 1,0),star_param(candidate_indices),[], [], [], [], lb,ub,[], options);
        [objfun0(t), W0{t}, g0{t}]=gmm_fun_y_c_avg(Mhat0t(t,:), candidate_indices, star_param,  Y(idxt,:),t, side, retailer, 1,0);
    elseif homedealer==1 && eta==0
        [Mhat0t(t,:), fval0t(t,:),flag0t(t,:),output0t{t}] = fmincon(@(z)gmm_fun_y_c_avg_eta_H(z, candidate_indices, star_param,  Y(idxt,:), vdjQt, t,side, retailer, 1),star_param(candidate_indices),[], [], [], [], lb,ub,[], options);
        [objfun0(t), W0{t}, g0{t}]=gmm_fun_y_c_avg_eta_H(Mhat0t(t,:), candidate_indices, star_param,  Y(idxt,:),vdjQt, t, side, retailer, 1);
    elseif homedealer==0 && eta>0
        [Mhat0t(t,:), fval0t(t,:),flag0t(t,:),output0t{t}] = fmincon(@(z)gmm_fun_y_c_avg_eta(z, candidate_indices, star_param,  Y(idxt,:), vdjQt, t,side, retailer, 1),star_param(candidate_indices),[], [], [], [], lb,ub,[], options);
        [objfun0(t), W0{t}, g0{t}]=gmm_fun_y_c_avg_eta_H(Mhat0t(t,:), candidate_indices, star_param,  Y(idxt,:),vdjQt, t, side, retailer, 1);
   end

    %Second step
    star_param = parameters;
    star_param(candidate_indices) = Mhat0t(t,:);
    
    %with eta
    if homedealer==0 && eta==0
        [Mhat(t,:),fvalt(t,:),flagt(t,:),outputt{t}, lambda,grad{t},hessian{t}] = fmincon(@(z)gmm_fun_y_c_avg(z, candidate_indices, star_param,  Y(idxt,:), t,side, retailer, 2,0),star_param(candidate_indices),[], [], [], [], lb,ub,[], options); 
        star_param = parameters;
        star_param(candidate_indices) = Mhat(t,:);
        [objfun(t), W{t}, g{t}]=gmm_fun_y_c_avg(Mhat(t,:), candidate_indices, star_param,  Y(idxt,:), t,side, retailer, 2,0);
        
    elseif homedealer==0 && eta>0
        [Mhat(t,:),fvalt(t,:),flagt(t,:),outputt{t}, lambda,grad{t},hessian{t}] = fmincon(@(z)gmm_fun_y_c_avg_eta(z, candidate_indices, star_param,  Y(idxt,:),vdjQt, t,side, retailer, 2),star_param(candidate_indices),[], [], [], [], lb,ub,[], options);
        star_param = parameters;
        star_param(candidate_indices) = Mhat(t,:);
        [objfun(t), W{t}, g{t}]=gmm_fun_y_c_avg_eta(Mhat(t,:), candidate_indices, star_param,  Y(idxt,:), vdjQt, t,side, retailer, 2);
    
    elseif homedealer==1 && eta==0
        [Mhat(t,:),fvalt(t,:),flagt(t,:),outputt{t}, lambda,grad{t},hessian{t}] = fmincon(@(z)gmm_fun_y_c_avg_eta_H(z, candidate_indices, star_param,  Y(idxt,:), vdjQt, t,side, retailer, 2),star_param(candidate_indices),[], [], [], [], lb,ub,[], options); 
        star_param = parameters;
        star_param(candidate_indices) = Mhat(t,:);
        [objfun(t), W{t}, g{t}]=gmm_fun_y_c_avg_eta_H(Mhat(t,:), candidate_indices, star_param,  Y(idxt,:), vdjQt, t,side, retailer, 2);
   
    end
    
    %Standard errors for benchmark model 
    V{t} = gradient_fun_y3_avg(star_param, Y(idxt,:),vdjQt, t, retailer, side);
    SE(t,:)= sqrt(1/n*diag(V{t}));
    
    %Confidence interval:
    CI_U(t,:) = Mhat(t,:) + SE(t,:).*1.96 ;
    CI_L(t,:) = Mhat(t,:) - SE(t,:).*1.96 ;
    
    %P-value
    t_stats(t,:) = Mhat(t,:)./SE(t,:);
    param  = length(Mhat(t,:)); %number of parameters
    p(t,:) = 1-tcdf(abs(t_stats(t,:)), n - param-1);
    
end 

%Check the first-order optimality
for k=1:size(vec,2)
    w=vec(k);
    t=1+(w-1)*spant; 
    %to check the first order opt
    out(t)=outputt{1,t}.firstorderopt;
    out0(t)=output0t{1,t}.firstorderopt;
    gvec(t,:) = g{t};   
end 


if zerobenefit==0 %normal
    temp=['/momentsatestimate',['_',num2str(side),'_' num2str(retailer),'_',num2str(day), '_', num2str(homedealer), '_', num2str(keepall),'_', num2str(quantityrobus), '_', num2str(largetrade), '_', num2str(tradesize),'_',num2str(fej) ]];
else %fixing qualitities but zero home dealer
    temp=['/momentsatestimate',['_',num2str(side),'_' num2str(retailer),'_',num2str(day), '_', num2str(0), '_', num2str(keepall),'_', num2str(quantityrobus), '_', num2str(largetrade), '_', num2str(tradesize),'_',num2str(fej),'_zerobenefit']]; 
end 

save(fullfile([foldermats,temp,'.mat']))
 

 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% STATUS QUO: Model predictions 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    %Note: This is just so that one can easily run
    %plot_estimation_results.m and produce the output estimation tables as
    %in the draft. The SQ is computed again on mainfile_combine to prepare
    %for the counterfactual analysis. 

if retailer==0 
    
    SQ=0;                %means use the estimated value for the dealer
    CF=0;                %means use esitmated cost of entry
    access=1;

    [psiSQ,rhojSQ,sjSQ,sHjSQ, sNHjSQ, s0jSQ, s0HjSQ, s0NHjSQ, etaEjSQ,etaDjSQ,piDprimejSQ,dealer_idsSQ,uBSQ,uESQ,piBSQ,piESQ,YBSQ,YESQ,EcostSQ,...
        q_data, rhoj_data, s0j_data,xij_data, YBno, EUno, uE2, psi_opt, rhoj_opt, vdjSQ, WSQ, WxijSQ, WvdjSQ, WxijHSQ, WSQwH, WxijSQwH, WvdjSQwH, WxijHSQwH] = status_quo_fun(SQ, CF, side, access, Y, new_period, new_period, [], 1, Mhat);
   
    %sort according to dealers:
    xij    = xij_data;
    med_xij=nanmedian(xij_data);
    cols = [3,9,4,1,2,5,8,6,7];
end 
 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% SAVE ESTIMATES 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if zerobenefit==0
    temp=['/estimation',['_',num2str(side),'_' num2str(retailer),'_',num2str(day), '_', num2str(homedealer), '_', num2str(keepall),'_', num2str(quantityrobus),'_', num2str(largetrade), '_', num2str(tradesize),'_',num2str(fej) ]];
else 
    temp=['/estimation',['_',num2str(side),'_' num2str(retailer),'_',num2str(day), '_', num2str(0), '_', num2str(keepall),'_', num2str(quantityrobus),'_', num2str(largetrade), '_', num2str(tradesize),'_',num2str(fej) , '_zerobenefit']];
end 

save(fullfile([foldermats,temp,'.mat']))
 
%clear variables for next loop
if eta>0 && retailer==1
    clearvars -except data D I T Imax Dmax basedealer   spant side retailer folderfig foldermats xij_for_investor eta homedealer quantityrobus day vdjSQ_IS vdjSQ_IB largetrade keepall tradesize fej zerobenefit
else 
    clearvars -except data D I T Imax Dmax basedealer   spant side retailer folderfig foldermats xij_for_investor eta homedealer quantityrobus day largetrade keepall tradesize fej zerobenefit
end

end
end 

toc

